import { noopTest, edit } from "./helpers.js";

/**
 * Block-Level Grammar
 */
export const block = {
	newline: /^(?: *(?:\n|$))+/,
	code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/,
	fences:
		/^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/,
	hr: /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/,
	heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
	blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
	list: /^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/,
	html:
		"^ {0,3}(?:" + // optional indentation
		"<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)" + // (1)
		"|comment[^\\n]*(\\n+|$)" + // (2)
		"|<\\?[\\s\\S]*?(?:\\?>\\n*|$)" + // (3)
		"|<![A-Z][\\s\\S]*?(?:>\\n*|$)" + // (4)
		"|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)" + // (5)
		"|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n *)+\\n|$)" + // (6)
		"|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)" + // (7) open tag
		"|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)" + // (7) closing tag
		")",
	def: /^ {0,3}\[(label)\]: *(?:\n *)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/,
	table: noopTest,
	lheading: /^((?:.|\n(?!\n))+?)\n {0,3}(=+|-+) *(?:\n+|$)/,
	// regex template, placeholders will be replaced according to different paragraph
	// interruption rules of commonmark and the original markdown spec:
	_paragraph:
		/^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/,
	text: /^[^\n]+/
};

block._label = /(?!\s*\])(?:\\.|[^\[\]\\])+/;
block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/;
block.def = edit(block.def)
	.replace("label", block._label)
	.replace("title", block._title)
	.getRegex();

block.bullet = /(?:[*+-]|\d{1,9}[.)])/;
block.listItemStart = edit(/^( *)(bull) */)
	.replace("bull", block.bullet)
	.getRegex();

block.list = edit(block.list)
	.replace(/bull/g, block.bullet)
	.replace(
		"hr",
		"\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))"
	)
	.replace("def", "\\n+(?=" + block.def.source + ")")
	.getRegex();

block._tag =
	"address|article|aside|base|basefont|blockquote|body|caption" +
	"|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption" +
	"|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe" +
	"|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option" +
	"|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr" +
	"|track|ul";
block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/;
block.html = edit(block.html, "i")
	.replace("comment", block._comment)
	.replace("tag", block._tag)
	.replace(
		"attribute",
		/ +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/
	)
	.getRegex();

block.paragraph = edit(block._paragraph)
	.replace("hr", block.hr)
	.replace("heading", " {0,3}#{1,6} ")
	.replace("|lheading", "") // setex headings don't interrupt commonmark paragraphs
	.replace("|table", "")
	.replace("blockquote", " {0,3}>")
	.replace("fences", " {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n")
	.replace("list", " {0,3}(?:[*+-]|1[.)]) ") // only lists starting from 1 can interrupt
	.replace(
		"html",
		"</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)"
	)
	.replace("tag", block._tag) // pars can be interrupted by type (6) html blocks
	.getRegex();

block.blockquote = edit(block.blockquote)
	.replace("paragraph", block.paragraph)
	.getRegex();

/**
 * Normal Block Grammar
 */

block.normal = { ...block };

/**
 * GFM Block Grammar
 */

block.gfm = {
	...block.normal,
	table:
		"^ *([^\\n ].*\\|.*)\\n" + // Header
		" {0,3}(?:\\| *)?(:?-+:? *(?:\\| *:?-+:? *)*)(?:\\| *)?" + // Align
		"(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)" // Cells
};

block.gfm.table = edit(block.gfm.table)
	.replace("hr", block.hr)
	.replace("heading", " {0,3}#{1,6} ")
	.replace("blockquote", " {0,3}>")
	.replace("code", " {4}[^\\n]")
	.replace("fences", " {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n")
	.replace("list", " {0,3}(?:[*+-]|1[.)]) ") // only lists starting from 1 can interrupt
	.replace(
		"html",
		"</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)"
	)
	.replace("tag", block._tag) // tables can be interrupted by type (6) html blocks
	.getRegex();

block.gfm.paragraph = edit(block._paragraph)
	.replace("hr", block.hr)
	.replace("heading", " {0,3}#{1,6} ")
	.replace("|lheading", "") // setex headings don't interrupt commonmark paragraphs
	.replace("table", block.gfm.table) // interrupt paragraphs with table
	.replace("blockquote", " {0,3}>")
	.replace("fences", " {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n")
	.replace("list", " {0,3}(?:[*+-]|1[.)]) ") // only lists starting from 1 can interrupt
	.replace(
		"html",
		"</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)"
	)
	.replace("tag", block._tag) // pars can be interrupted by type (6) html blocks
	.getRegex();
/**
 * Pedantic grammar (original John Gruber's loose markdown specification)
 */

block.pedantic = {
	...block.normal,
	html: edit(
		"^ *(?:comment *(?:\\n|\\s*$)" +
			"|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)" + // closed tag
			"|<tag(?:\"[^\"]*\"|'[^']*'|\\s[^'\"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))"
	)
		.replace("comment", block._comment)
		.replace(
			/tag/g,
			"(?!(?:" +
				"a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub" +
				"|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)" +
				"\\b)\\w+(?!:|[^\\w\\s@]*@)\\b"
		)
		.getRegex(),
	def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
	heading: /^(#{1,6})(.*)(?:\n+|$)/,
	fences: noopTest, // fences not supported
	lheading: /^(.+?)\n {0,3}(=+|-+) *(?:\n+|$)/,
	paragraph: edit(block.normal._paragraph)
		.replace("hr", block.hr)
		.replace("heading", " *#{1,6} *[^\n]")
		.replace("lheading", block.lheading)
		.replace("blockquote", " {0,3}>")
		.replace("|fences", "")
		.replace("|list", "")
		.replace("|html", "")
		.getRegex()
};

/**
 * Inline-Level Grammar
 */
export const inline = {
	escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,
	autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
	url: noopTest,
	tag:
		"^comment" +
		"|^</[a-zA-Z][\\w:-]*\\s*>" + // self-closing tag
		"|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>" + // open tag
		"|^<\\?[\\s\\S]*?\\?>" + // processing instruction, e.g. <?php ?>
		"|^<![a-zA-Z]+\\s[\\s\\S]*?>" + // declaration, e.g. <!DOCTYPE html>
		"|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>", // CDATA section
	link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
	reflink: /^!?\[(label)\]\[(ref)\]/,
	nolink: /^!?\[(ref)\](?:\[\])?/,
	reflinkSearch: "reflink|nolink(?!\\()",
	emStrong: {
		lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/,
		//        (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left.  (5) and (6) can be either Left or Right.
		//          () Skip orphan inside strong                                      () Consume to delim     (1) #***                (2) a***#, a***                             (3) #***a, ***a                 (4) ***#              (5) #***#                 (6) a***a
		rDelimAst:
			/^(?:[^_*\\]|\\.)*?\_\_(?:[^_*\\]|\\.)*?\*(?:[^_*\\]|\\.)*?(?=\_\_)|(?:[^*\\]|\\.)+(?=[^*])|[punct_](\*+)(?=[\s]|$)|(?:[^punct*_\s\\]|\\.)(\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|(?:[^punct*_\s\\]|\\.)(\*+)(?=[^punct*_\s])/,
		rDelimUnd:
			/^(?:[^_*\\]|\\.)*?\*\*(?:[^_*\\]|\\.)*?\_(?:[^_*\\]|\\.)*?(?=\*\*)|(?:[^_\\]|\\.)+(?=[^_])|[punct*](\_+)(?=[\s]|$)|(?:[^punct*_\s\\]|\\.)(\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _
	},
	code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
	br: /^( {2,}|\\)\n(?!\s*$)/,
	del: noopTest,
	text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/,
	punctuation: /^([\spunctuation])/
};

// list of punctuation marks from CommonMark spec
// without * and _ to handle the different emphasis markers * and _
inline._punctuation = "!\"#$%&'()+\\-.,/:;<=>?@\\[\\]`^{|}~";
inline.punctuation = edit(inline.punctuation)
	.replace(/punctuation/g, inline._punctuation)
	.getRegex();

// sequences em should skip over [title](link), `code`, <html>
inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g;
// lookbehind is not available on Safari as of version 16
// inline.escapedEmSt = /(?<=(?:^|[^\\)(?:\\[^])*)\\[*_]/g;
inline.escapedEmSt = /(?:^|[^\\])(?:\\\\)*\\[*_]/g;

inline._comment = edit(block._comment).replace("(?:-->|$)", "-->").getRegex();

inline.emStrong.lDelim = edit(inline.emStrong.lDelim)
	.replace(/punct/g, inline._punctuation)
	.getRegex();

inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, "g")
	.replace(/punct/g, inline._punctuation)
	.getRegex();

inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, "g")
	.replace(/punct/g, inline._punctuation)
	.getRegex();

inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;

inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
inline._email =
	/[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;
inline.autolink = edit(inline.autolink)
	.replace("scheme", inline._scheme)
	.replace("email", inline._email)
	.getRegex();

inline._attribute =
	/\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/;

inline.tag = edit(inline.tag)
	.replace("comment", inline._comment)
	.replace("attribute", inline._attribute)
	.getRegex();

inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/;
inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/;
inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/;

inline.link = edit(inline.link)
	.replace("label", inline._label)
	.replace("href", inline._href)
	.replace("title", inline._title)
	.getRegex();

inline.reflink = edit(inline.reflink)
	.replace("label", inline._label)
	.replace("ref", block._label)
	.getRegex();

inline.nolink = edit(inline.nolink).replace("ref", block._label).getRegex();

inline.reflinkSearch = edit(inline.reflinkSearch, "g")
	.replace("reflink", inline.reflink)
	.replace("nolink", inline.nolink)
	.getRegex();

/**
 * Normal Inline Grammar
 */

inline.normal = { ...inline };

/**
 * Pedantic Inline Grammar
 */

inline.pedantic = {
	...inline.normal,
	strong: {
		start: /^__|\*\*/,
		middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
		endAst: /\*\*(?!\*)/g,
		endUnd: /__(?!_)/g
	},
	em: {
		start: /^_|\*/,
		middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
		endAst: /\*(?!\*)/g,
		endUnd: /_(?!_)/g
	},
	link: edit(/^!?\[(label)\]\((.*?)\)/)
		.replace("label", inline._label)
		.getRegex(),
	reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/)
		.replace("label", inline._label)
		.getRegex()
};

/**
 * GFM Inline Grammar
 */

inline.gfm = {
	...inline.normal,
	escape: edit(inline.escape).replace("])", "~|])").getRegex(),
	_extended_email:
		/[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/,
	url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,
	_backpedal:
		/(?:[^?!.,:;*_'"~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_'"~)]+(?!$))+/,
	del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/,
	text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/
};

inline.gfm.url = edit(inline.gfm.url, "i")
	.replace("email", inline.gfm._extended_email)
	.getRegex();
/**
 * GFM + Line Breaks Inline Grammar
 */

inline.breaks = {
	...inline.gfm,
	br: edit(inline.br).replace("{2,}", "*").getRegex(),
	text: edit(inline.gfm.text)
		.replace("\\b_", "\\b_| {2,}\\n")
		.replace(/\{2,\}/g, "*")
		.getRegex()
};
